Exercise 7

Conceptual

Q1

(a)

\(a_1=\beta_0,b_1=\beta_1,c_1=\beta_2,d_1=\beta_4\)

(b)

\(a_2=\beta_0-\beta_4\xi^3,b_2=\beta_1+3\beta_4\xi^2,c_2=\beta_2-3\beta_4\xi,d_2=\beta_3+\beta_4\)

(c)

\[ \begin{aligned} f_2(\xi)&=\beta_0-\beta_4\xi^3+\beta_1\xi+3\beta_4\xi^3+\beta_2\xi^2-3\beta_4\xi^3+\beta_3\xi^3+\beta_4\xi^3\\ &=\beta_0+\beta_1\xi+\beta_2\xi^2+\beta_3\xi^3=f_1(\xi)\\ \end{aligned} \]

(d)

\[ \begin{aligned} f^\prime_2(x)&=\beta_1+3\beta_4\xi^2+(2\beta_2-6\beta_4\xi)x+(3\beta_4+3\beta_3)x^2\\ f^\prime_1(x)&=\beta_1+2\beta_2x+3\beta_3x^2 \end{aligned}\\ \begin{aligned} \rightarrow f^\prime_2(\xi)&=\beta_1+3\beta_4\xi^2+2\beta_2\xi-6\beta_4\xi^2+3\beta_4\xi^2+3\beta_3\xi^2\\ &=\beta_1+2\beta_2\xi+3\beta_3\xi^2=f^\prime_1(\xi) \end{aligned} \]

(e)

\[ \begin{aligned} f^{\prime\prime}_2(x)&=2\beta_2-6\beta_4\xi+(6\beta_4+6\beta_3)x\\ f^{\prime\prime}_1(x)&=2\beta_2+6\beta_3x \end{aligned}\\ \begin{aligned} \rightarrow f^{\prime\prime}_2(x)&=2\beta_2-6\beta_4\xi+6\beta_4\xi+6\beta_3\xi\\ &=2\beta_2+6\beta_3\xi=f^{\prime\prime}_1(x) \end{aligned} \]

Q2

(a)

When \(\lambda\) is very large, the first term can be ignored. So when \(g^{{0}}=g=0\), we can optimize the equation.

(b)

When \(\lambda\) is very large, \(g^{(1)}=0\) means that \(g\) is a constant function.

(c)

When \(\lambda\) is very large, \(g^{(2)}=0\) means that \(g\) is a linear function.

(d)

When \(\lambda\) is very large, \(g^{(1)}=0\) means that \(g\) is a quadratic function.

(e)

When \(\lambda\) is very l0, the second term can be ignored and \(g\) is the optimail solution of the least squares. It is obvious that g is a steo function which across every point.

Q3

x1=seq(-2,1,0.01)
y1=1+x1
plot(x1,y1,xlim=c(-2,2),ylim=c(-5,5))
x2=seq(1,2,0.01)
y2=1+x2-2*(x2-1)^2
points(x2,y2)

One knot at 1.

Q4

x1=seq(-2,0,0.01)
y1=rep(1,length(x1))
plot(x1,y1,xlim=c(-2,2),ylim=c(-3,3))
x2=seq(0,1,0.01)
y2=rep(2,length(x2))
points(x2,y2)
x3=seq(1,2,0.01)
y3=3-x3
points(x3,y3)

#Another way to plot using ggplot
# x.1 <- seq(-6, 0, 0.1)  # [-6,0)
# x.2 <- seq(0, 1, 0.1)   # [0,1)
# x.3 <- seq(1, 2, 0.1)   # [1,2]
# x.4 <- seq(2, 3, 0.1)   # (2,3)
# x.5 <- seq(3, 4, 0.1)   # [3,4]
# x.6 <- seq(4, 5, 0.1)   # (4,5]
# x.7 <- seq(5, 6, 0.1)   # (5,6)
# y.1 <- rep(1, length(x.1)) 
# y.2 <- rep(2, length(x.2))
# y.3 <- 3 - x.3
# y.4 <- rep(1, length(x.4))
# y.5 <- 3*x.5 - 8
# y.6 <- rep(4, length(x.6))
# y.7 <- rep(1, length(x.7))
# df <- data.frame(X = c(x.1,x.2,x.3,x.4,x.5,x.6,x.7),
#                  Y = c(y.1,y.2,y.3,y.4,y.5,y.6,y.7))
# p <- ggplot(df, aes(x=X,y=Y)) + geom_line(size=1.5)
# rect <- data.frame(xmin=-2, xmax=2, ymin=-Inf, ymax=Inf)
# p + geom_rect(data=rect, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax),
#               fill="grey20",
#               alpha=0.4,
#               inherit.aes = FALSE)

Two knots at 0 and 1

Q5

(a)

\(\hat g_2\). Since \(g^{(3)}\) is more strict than \(g^{(4)}\) in terms of smmothness, \(\hat g_2\) is more flexible and can fit better in the training data.

(b)

It depends on the form of real y.

(c)

Same results.

Applied

Q6

(a)

library(ISLR)
library(boot)
set.seed(1)
cv.error.10=rep(0,10)
for(i in 1:10){
poly.fit=glm(wage~poly(age,i),data=Wage)
cv.error.10[i]=cv.glm(Wage,poly.fit,K=10)$delta[1]
}
degree=which.min(cv.error.10)
poly.fit=lm(wage~poly(age,degree),data=Wage)
fit.1=lm(wage~poly(age,1),data=Wage)
fit.2=lm(wage~poly(age,2),data=Wage)
fit.3=lm(wage~poly(age,3),data=Wage)
fit.5=lm(wage~poly(age,5),data=Wage)
anova(fit.1,fit.2,fit.3,poly.fit,fit.5)

## Analysis of Variance Table
## 
## Model 1: wage ~ poly(age, 1)
## Model 2: wage ~ poly(age, 2)
## Model 3: wage ~ poly(age, 3)
## Model 4: wage ~ poly(age, degree)
## Model 5: wage ~ poly(age, 5)
##   Res.Df     RSS Df Sum of Sq        F    Pr(>F)    
## 1   2998 5022216                                    
## 2   2997 4793430  1    228786 143.8118 < 2.2e-16 ***
## 3   2996 4777674  1     15756   9.9038  0.001666 ** 
## 4   2990 4756703  6     20971   2.1970  0.040533 *  
## 5   2994 4770322 -4    -13619   2.1401  0.073355 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lim=range(Wage$age)
grid=seq(lim[1],lim[2])
preds=predict(poly.fit,newdata=list(age=grid),se=TRUE)
plot(Wage$age,Wage$wage)
lines(grid,preds$fit,lwd=2,col='red',xlim=lim)

Degree 4 is chosen.

(b)

set.seed(1)
cv.error.10=rep(0,10)

for(i in 1:10){
  #Cut data first
 Wage$age.cut <- cut(Wage$age,i+1)
 glm.fit <- glm(wage~age.cut, data=Wage)
  cv.error.10[i] <- cv.glm(Wage, glm.fit, K=10)$delta[1]
#step.fit=glm(wage~cut(age,i+1),data=Wage)    
#cv.error.10[i]=cv.glm(Wage,step.fit,K=10)$delta[1]
}
number=which.min(cv.error.10)

plot(2:11,cv.error.10,type='b')

step.fit=glm(wage~cut(age,8),data=Wage)
preds=predict(step.fit,newdata=list(age=grid),se=TRUE)
se.bands=preds$fit + cbind(2*preds$se.fit, -2*preds$se.fit)
plot(Wage$age,Wage$wage)
lines(grid, preds$fit, lwd=2, col="blue")
matlines(grid, se.bands, lwd=1, col="blue", lty=3)

The number of optimal cuts is 8.

Q7

par(mfrow=c(2,1))
plot(Wage$maritl,Wage$wage)
plot(Wage$jobclass,Wage$wage)

Both maritl and jobclass are categorical data. For jobclass, jobclass=information seems provids more salary than industrial. For maritl, married people seem to obtain the highest wages.

library(gam)

## Loading required package: splines

## Loading required package: foreach

## Loaded gam 1.16.1

gam.fit1 <- gam(wage~ns(age,4), data=Wage)

## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored

gam.fit2.1 <- gam(wage~ns(age,4)+maritl, data=Wage)

## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored

gam.fit2.2 <- gam(wage~ns(age,)+jobclass, data=Wage)

## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored

gam.fit3 <- gam(wage~ns(age,4)+maritl+jobclass, data=Wage)

## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored

anova(gam.fit1, gam.fit2.1, gam.fit3)

## Analysis of Deviance Table
## 
## Model 1: wage ~ ns(age, 4)
## Model 2: wage ~ ns(age, 4) + maritl
## Model 3: wage ~ ns(age, 4) + maritl + jobclass
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1      2995    4773109                          
## 2      2991    4650697  4   122412 < 2.2e-16 ***
## 3      2990    4479913  1   170784 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(1,3))
plot(gam.fit3, se=TRUE, col="blue")

Q8

pairs(Auto)

It can be seen that ’displacement,horsepower,weight,acceleration` may have nonlinear relationships.

gam.fit=gam(mpg~s(displacement,4)+s(horsepower,5)+year+cylinders,data=Auto)

## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored

par(mfrow=c(1,4))
plot(gam.fit, se=TRUE, col="blue")

Q9

(a)

library(MASS)
fit=lm(nox~poly(dis,3),Boston)
summary(fit)

## 
## Call:
## lm(formula = nox ~ poly(dis, 3), data = Boston)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.121130 -0.040619 -0.009738  0.023385  0.194904 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.554695   0.002759 201.021  < 2e-16 ***
## poly(dis, 3)1 -2.003096   0.062071 -32.271  < 2e-16 ***
## poly(dis, 3)2  0.856330   0.062071  13.796  < 2e-16 ***
## poly(dis, 3)3 -0.318049   0.062071  -5.124 4.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06207 on 502 degrees of freedom
## Multiple R-squared:  0.7148, Adjusted R-squared:  0.7131 
## F-statistic: 419.3 on 3 and 502 DF,  p-value: < 2.2e-16

lim=range(Boston$dis)
grid=seq(lim[1],lim[2],0.1)
preds=predict(fit,newdata=list(dis=grid),se=TRUE)
se.bands <- preds$fit + cbind(2*preds$se.fit, -2*preds$se.fit)
plot(Boston$dis,Boston$nox)
lines(grid,preds$fit,lwd=2,col='red',xlim=lim)
matlines(grid,se.bands,col='red')

Intercept is 0.554695, parameters for i-th degree of dis are -2.003096,0.856330 and -0.318049 for i=1,2,3.

(b)

error=rep(0,10)
for(i in 1:10){
fit=lm(nox~poly(dis,i),Boston)
preds=predict(fit,Boston)
error[i]=sum((preds-Boston$nox)^2)  #or fit$residuals^2
}
error

##  [1] 2.768563 2.035262 1.934107 1.932981 1.915290 1.878257 1.849484
##  [8] 1.835630 1.833331 1.832171

(c)

set.seed(1)
cv.error.10=rep(0,10)
for(i in 1:10){
poly.fit=glm(nox~poly(dis,i),data=Boston)
cv.error.10[i]=cv.glm(Boston,poly.fit,K=10)$delta[1]
}
which.min(cv.error.10)

## [1] 4

Chose degree 4.

(d)

library(splines)
fit=lm(nox~bs(dis,df=4),data=Boston)
attr(bs(Boston$dis,df=4),'knots')

##     50% 
## 3.20745

pred=predict(fit,newdata=list(dis=grid))
plot(Boston$dis,Boston$nox)
lines(grid,pred,lwd=2,col='red')

(e)

par(mfrow=c(2,3))
error=rep(0,9)
for(i in 3:11){
fit=lm(nox~bs(dis,df=i),data=Boston)
attr(bs(Boston$dis,df=i),'knots')
pred=predict(fit,newdata=list(dis=grid))
plot(Boston$dis,Boston$nox)
lines(grid,pred,lwd=2,col='red')
error[i-2]=sum(fit$residuals^2)
}

which.min(error)

## [1] 8

When degree freedom is 10, we can obtain the smallest RSS.

(f)

set.seed(1)
cv.error.10=rep(0,7)
for(i in 4:10){
poly.fit=glm(nox~bs(dis,df=i),data=Boston)
cv.error.10[i-3]=cv.glm(Boston,poly.fit,K=10)$delta[1]
}

## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.23925), Boundary.knots
## = c(1.137, : some 'x' values beyond boundary knots may cause ill-
## conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.23925), Boundary.knots
## = c(1.137, : some 'x' values beyond boundary knots may cause ill-
## conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.2157), Boundary.knots =
## c(1.1296, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.2157), Boundary.knots =
## c(1.1296, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.35953333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.35953333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.43386666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.43386666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.10215, `50%` =
## 3.1523, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.10215, `50%` =
## 3.1523, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.0493, `50%` = 3.0921, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.0493, `50%` = 3.0921, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`20%` = 1.9512, `40%` = 2.6403, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`20%` = 1.9512, `40%` = 2.6403, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.82203333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.82203333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.85586666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.85586666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.79318571428571, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.79318571428571, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.7821, `28.57143%`
## = 2.1678, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.7821, `28.57143%`
## = 2.1678, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.743225, `25%` =
## 2.0941, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.743225, `25%` =
## 2.0941, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.7519375, `25%` =
## 2.070275, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.7519375, `25%` =
## 2.070275, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases

which.min(cv.error.10)

## [1] 4

Using cross-validation, degree freedom should be 10.

Q10

(a)

library(leaps)
train_index=sample(nrow(College),7*nrow(College)/10)
train=College[train_index,]
test=College[-train_index,]

predict.regsubsets <- function(object, newdata, id, ...){
  form <- as.formula(object$call[[2]])
  mat <- model.matrix(form, newdata)
  coefi <- coef(object, id=id)
  xvars <- names(coefi)
  mat[,xvars]%*%coefi
}

# forward selection
reg.fwd=regsubsets(Outstate~.,data=train,method='forward',nvmax=17)
regsum=summary(reg.fwd)

err.fwd <- rep(0, ncol(College)-1)
for(i in 1:(ncol(College)-1)) {
  pred.fwd <- predict(reg.fwd, test, id=i)
  err.fwd[i] <- mean((test$Outstate - pred.fwd)^2)
}

par(mfrow=c(1,2))
plot(err.fwd,main="Test MSE", xlab="Number of Predictors")
min.mse <- which.min(err.fwd)  
points(min.mse, err.fwd[min.mse], col="red", pch=4, lwd=5)
plot(regsum$adjr2, type="b", main="Adjusted R^2", xlab="Number of Predictors")
max.adjr2 <- which.max(regsum$adjr2)  
points(max.adjr2, regsum$adjr2[max.adjr2], col="red", pch=4, lwd=5)

(b)

gam.fit <- gam(Outstate ~ 
                 Private +   
                 s(Room.Board,3) + 
                 s(Terminal,3) + 
                 s(perc.alumni,3) +
               s(Expend,3) + 
                 s(Grad.Rate,3), 
               data=College)

## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored

par(mfrow=c(2,3))
plot(gam.fit, se=TRUE, col="blue")

### (c)

pred <- predict(gam.fit, test)
(mse.error <- mean((test$Outstate - pred)^2))

## [1] 2940903

err.fwd[6]

## [1] 3757382

(d)

summary(gam.fit)

## 
## Call: gam(formula = Outstate ~ Private + s(Room.Board, 3) + s(Terminal, 
##     3) + s(perc.alumni, 3) + s(Expend, 3) + s(Grad.Rate, 3), 
##     data = College)
## Deviance Residuals:
##      Min       1Q   Median       3Q      Max 
## -7110.16 -1137.02    50.44  1285.38  8278.86 
## 
## (Dispersion Parameter for gaussian family taken to be 3520187)
## 
##     Null Deviance: 12559297426 on 776 degrees of freedom
## Residual Deviance: 2675342725 on 760.0001 degrees of freedom
## AIC: 13936.36 
## 
## Number of Local Scoring Iterations: 2 
## 
## Anova for Parametric Effects
##                    Df     Sum Sq    Mean Sq F value    Pr(>F)    
## Private             1 3366732308 3366732308 956.407 < 2.2e-16 ***
## s(Room.Board, 3)    1 2549088628 2549088628 724.134 < 2.2e-16 ***
## s(Terminal, 3)      1  802254341  802254341 227.901 < 2.2e-16 ***
## s(perc.alumni, 3)   1  525154274  525154274 149.184 < 2.2e-16 ***
## s(Expend, 3)        1 1022010841 1022010841 290.329 < 2.2e-16 ***
## s(Grad.Rate, 3)     1  151344060  151344060  42.993 1.014e-10 ***
## Residuals         760 2675342725    3520187                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Anova for Nonparametric Effects
##                   Npar Df Npar F   Pr(F)    
## (Intercept)                                 
## Private                                     
## s(Room.Board, 3)        2  2.591 0.07557 .  
## s(Terminal, 3)          2  2.558 0.07815 .  
## s(perc.alumni, 3)       2  0.835 0.43446    
## s(Expend, 3)            2 56.179 < 2e-16 ***
## s(Grad.Rate, 3)         2  3.363 0.03515 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova for Parametric Effects measures linear relationships and Anova for Nonparametric Effects measures nonlinear relationships.

Strong evidence of non-linear effects for Expend, some evidence for Room.Board, Terminal and Grad.Rate, and no evidence for perc.alumni.

Q11

(a)

epi=rnorm(100)
x1=rnorm(100)
x2=rnorm(100)
y=1+2*x1+3*x2+epi

(b)

let \(\beta_1\) equals 5.

(c)

beta1=5
a=y-beta1*x1
beta2=lm(a~x2)$coef[2]

(d)

beta1=5
a=y-beta2*x2
beta1=lm(a~x1)$coef[2]

(e)

beta1=6
beta_0=rep(0,1000)
beta_1=rep(0,1000)
beta_2=rep(0,1000)
for(i in 1:1000){
  a=y-beta1*x1
  beta2=lm(a~x2)$coef[2]
  a=y-beta2*x2
  beta1=lm(a~x1)$coef[2]
  
  beta_0[i]=lm(a~x1)$coef[1]
  beta_1[i]=beta1
  beta_2[i]=beta2
}
x=seq(1000)
par(mfrow=c(3,1))
plot(x,beta_0,ylim=c(0,7))
plot(x,beta_1,col='red',ylim=c(0,7))
plot(x,beta_2,col='blue',ylim=c(0,7))

(f)

fit.lm=lm(y~x1+x2)
summary(fit)

## 
## Call:
## lm(formula = nox ~ bs(dis, df = i), data = Boston)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12399 -0.03863 -0.01051  0.02591  0.18950 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.65326    0.03104  21.047  < 2e-16 ***
## bs(dis, df = i)1   0.03797    0.05409   0.702  0.48307    
## bs(dis, df = i)2   0.11127    0.03497   3.182  0.00156 ** 
## bs(dis, df = i)3  -0.03120    0.03740  -0.834  0.40456    
## bs(dis, df = i)4  -0.01168    0.03400  -0.344  0.73123    
## bs(dis, df = i)5  -0.11788    0.03676  -3.207  0.00143 ** 
## bs(dis, df = i)6  -0.15721    0.03532  -4.452 1.05e-05 ***
## bs(dis, df = i)7  -0.15300    0.03574  -4.281 2.23e-05 ***
## bs(dis, df = i)8  -0.21135    0.03463  -6.103 2.11e-09 ***
## bs(dis, df = i)9  -0.21396    0.04641  -4.610 5.14e-06 ***
## bs(dis, df = i)10 -0.27976    0.06611  -4.232 2.76e-05 ***
## bs(dis, df = i)11 -0.22988    0.06329  -3.632  0.00031 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06031 on 494 degrees of freedom
## Multiple R-squared:  0.735,  Adjusted R-squared:  0.7291 
## F-statistic: 124.6 on 11 and 494 DF,  p-value: < 2.2e-16

par(mfrow=c(3,1))
plot(x,beta_0,ylim=c(0,7))
abline(h=coef(fit.lm)[1], lty="dashed", lwd=3, col="brown")
plot(x,beta_1,col='red',ylim=c(0,7))
abline(h=coef(fit.lm)[2], lty="dashed", lwd=3, col="darkgreen")
plot(x,beta_2,col='blue',ylim=c(0,7))
abline(h=coef(fit.lm)[3], lty="dashed", lwd=3, col="darkblue")

(g)

In this dataset, three iterations are enough.

Q12

n=100
p=100
data=matrix(rnorm(n*p), ncol=p, nrow=n)
set.seed(1)
y=rep(0,100)
for( i in 1:100){
  data[,i]=rnorm(100)
  y=y+i*data[,i]
}
y=y+epi
data=data.frame(data)
mse.error <- rep(0, 2000)
beta=data.frame(sample(100))
##Backfitting
for(i in 1:2000){
  for(j in 1:100){
    a=y-as.matrix(data[,-(100-j+1)])%*%as.matrix(beta[-(100-j+1),])
    beta_in=lm(a~data[,100-j+1])$coef[2]
    beta[100-j+1,]=beta_in
  }
  mse.error[i] <- mean((y - as.matrix(data)%*%as.matrix(beta))^2)
}
plot(900:1000,mse.error[900:1000])

Over 2000 iterations.

Exercise 7

wenbo

Conceptual

(a)

(b)

(c)

(d)

(e)

(a)

(b)

(c)

(d)

(e)

(a)

(b)

(c)

Applied

(a)

(b)

(a)

(b)

(c)

(d)

(e)

(f)

(a)

(b)

(d)

(a)

(b)

(c)

(d)

(e)

(f)

(g)